*
*	Tidy applicant-level data
*

use "${raw_data_applicants_${year}}", clear

* Focus on the main round
keep if eljaras == 1

	sort id
	egen applicant_id_temp = group(id)
	tostring applicant_id_temp, replace
	gen applicant_id = "A" + applicant_id_temp
	lab var applicant_id "Applicant id"
	drop applicant_id_temp

	gen year = ${year}
	label variable year "Year"
* ------------------------------------------------------------------------------
* 1 Eligibility for funding 
* ------------------------------------------------------------------------------
	* Previous experience w/ funded semesters
	gen funded_semester = 0
	
	forvalues i = 1/13 {
		cap replace allami_felev`i' = 7 if allami_felev`i' == . & vegbiz`i' != .
		cap replace funded_semester = funded_semester + allami_felev`i' if allami_felev`i' != .
	}

	* Eligibility based on nationality
	gen eligibility_national = 0
	replace eligibility_national = 1 if (allamp == "belga" | allamp == "bolgár" ///
		 | allamp == "brit" | allamp == "cseh" ///
		 | allamp == "finn" | allamp == "horvát" | allamp == "lengyel" | allamp == "lett" ///
		 | allamp == "luxemburgi" | allamp == "magyar" | allamp == "norvég" | allamp == "német" ///
		 | allamp == "olasz" | allamp == "osztrák" | allamp == "portugál" | allamp == "román" ///
		 | allamp == "spanyol" | allamp == "svájci" | allamp == "svéd" | allamp == "szlovák" ///
		 | allamp == "szlovén" | allamp == "francia" | allamp == "görög"| allamp == "holland" ///
		 | allamp == "litván" | allamp == "észt" | allamp == "ciprusi") | hataron == 1
		 
	* Final eligibility measure
	gen eligibility =  (funded_semester < 12) & eligibility_national == 1
	
	drop eligibility_national

	label variable eligibility "Eligibe applicant"
	label define eligibility 0 "No" 1 "Yes"
	label values eligibility eligibility
	
	drop allami_felev* vegbiz*

* ------------------------------------------------------------------------------
* 2 Students' GPA in high-school
* ------------------------------------------------------------------------------	
	forvalues j = 11/12 {
		gen grade_literature_`j' = .
		gen grade_grammar_`j' = .
		gen grade_math_`j' = .
		gen grade_history_`j' = .
		
		forvalues i = 1/6 {
			replace grade_literature_`j' = targy`i'_jegy`j' if targy`i' == "magyar irodalom" & targy`i'_jegy`j' != .
			replace grade_grammar_`j' = targy`i'_jegy`j' if targy`i' == "magyar nyelvtan" & targy`i'_jegy`j' != .
			replace grade_math_`j' = targy`i'_jegy`j' if targy`i' == "matematika" & targy`i'_jegy`j' != .
			replace grade_history_`j' = targy`i'_jegy`j' if ///
				(targy`i' == "történelem" | ///
				 targy`i' == "történelem és társadalomismeret" |  ///
				 targy`i' == "történelem és állampolgári ismeretek") /// 
				& targy`i'_jegy`j' != .
		}
		
		
		gen GPA`j' = (grade_math_`j' + 0.5*grade_literature_`j' + 0.5*grade_grammar_`j')/2
		gen GPA`j'_missing = GPA`j' == .
		
		bys year: egen GPA`j'_mean = mean(GPA`j')
		bys year: egen GPA`j'_sd  = sd(GPA`j')
		bys year: gen  GPA`j'_std = (GPA`j'-GPA`j'_mean)/GPA`j'_sd
		drop *_mean *_sd
	}
	
	lab var GPA11 "11th-grade GPA"
	lab var GPA11_missing "11th-grade GPA - missing"
	lab var GPA11_std "11th-grade GPA (standardized)"
	
	lab var GPA12 "12th-grade GPA"
	lab var GPA12_missing "12th-grade GPA - missing"
	lab var GPA12_std "12th-grade GPA (standardized)"

* ------------------------------------------------------------------------------	
* 3 Tidy-up variables	
* ------------------------------------------------------------------------------
	
	
	gen age = year - szulev
	label variable age "Age (yrs)"
	
	rename nem female
	label variable female "Female"
	
	rename oszttip3 schooltype
	label variable schooltype "Type of secondary school"
	gen sec_school = (schooltype == 1)
	label variable sec_school "\ - secondary grammar school"
	gen voc_school = (schooltype == 2)
	label variable voc_school "\ - vocational school"
	gen other_school = (schooltype == 3)
	label variable other_school "\ - other school"

	* Residence
	rename teltip8 location_size
	label variable location_size "Size of the town (8)"
	replace location_size = 4 if location_size >= 4
	
	gen capital = (location_size == 1)
	label variable capital "\ - capital"
	gen countycapital = (location_size == 2)
	label variable countycapital "\ - county capital"
	gen town = (location_size == 3)
	label variable town "\ - town"
	gen village = (location_size >= 4)
	label variable village "\ - village"

	* Micro-region
	rename kist173 NUTSregion
	label variable NUTSregion "NUTS region (173)"

	* Postal code
	rename irszam zipcode
	label variable zipcode "Postal code"

	* School id
	rename omkod schoolid
	label variable schoolid "School id"

	* School leaving exam year
	rename erettsegi_ev exam_year
	label variable exam_year "Year of the school-leaving exam"

	* Nationality
	rename allamp nationality
	label variable nationality "Nationality"
	
	* Disadvantaged
	gen hh = 0
	gen hhh = 0
	forvalues i = 1/20 {
		cap replace hh = 1 if (tobblet_hh`i' > 0 & tobblet_hh`i' != .  & tobblet_hh`i' != 999)
		cap replace hhh = 1 if (tobblet_hhh`i' > 0 & tobblet_hhh`i' != . & tobblet_hhh`i' != 999)
	}
	gen disadv = hh == 1 | hhh == 1
	label variable disadv "Disadvantaged status"

	* Number of contracts
	gen contract_num = 0
	forvalues i = 1/20 {
		cap replace contract_num = contract_num + 1 if osszpont`i' != .
	}
	label variable contract_num "Number of contracts on ROL"
	
	* High-school seniors
	gen sample_hss = 1
	replace sample_hss = 0 if year != exam_year | age == . | age > 21
	label variable sample_hss "High-school senior applicant"
	
	* Country
	gen country = orszag
	
* ------------------------------------------------------------------------------	
* 4 Save data	
* ------------------------------------------------------------------------------
	keep applicant_id id szulev szulho female zipcode tazon NUTSregion location_size ///
		nationality schoolid exam_year eligibility GPA11 ///
		GPA11_missing GPA11_std GPA12 GPA12_missing GPA12_std year age sec_school voc_school ///
		other_school capital countycapital town village disadv ///
		sample_hss contract_num grade* country funded_*
	drop szulev szulho zipcode funded_semester grade_literature_11 ///
		grade_grammar_11 grade_math_11 grade_history_11 grade_literature_12 ///
		grade_grammar_12 grade_math_12 grade_history_12 GPA12 GPA12_missing ///
		GPA12_std country

	compress
	save "${data_applicants_${year}}", replace

